Convolution architecture

In [ ]:
!pip install librosa
In [ ]:
import numpy as np
import torchvision
import torchvision.transforms as transforms
import torch
from torch import nn, optim
from torch.utils import data
import random
from time import time
import random
from random import randrange
import librosa
from IPython.display import Audio
from scipy.signal import stft 
from scipy.signal import istft
import scipy as sp
import math
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
device = torch.device('cuda:0')
In [ ]:
# Upsample clean signal with 19 silent frames to to get output of 2459 by 513
rand_X = np.random.normal(0,0.001,(19,513))
rand_X_abs = np.abs(rand_X)
X_T1 = np.concatenate((rand_X_abs,X_T))
In [ ]:
# Create patches of 20 by 513 input signal and 1 by 513 output signal
X_patch = []
Y_patch = []
for i in range(0,S_T.shape[0]):
    X_patch.append(X_T1[i:i+20,:])
    Y_patch.append(S_T[i,:])
In [ ]:
# Create tensor object of patches

X_tensor = torch.tensor(X_patch, dtype=torch.float32).to(device)
Y_tensor = torch.tensor(Y_patch, dtype=torch.float32).to(device)
In [ ]:
# Load train data with batch size of 128

train_dataset = torch.utils.data.TensorDataset(X_tensor,Y_tensor)
trainloader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
In [ ]:
# Defining model architecture 

model=nn.Sequential(nn.Conv2d(1,16, kernel_size=(3,3), stride=1), 
                    nn.ReLU(),
                    nn.Conv2d(16,32, kernel_size=(3,3), stride=1),
                    nn.ReLU(),
                    nn.MaxPool2d(kernel_size=(2,2),stride=2),
                    nn.ReLU(), 
                    nn.Conv2d(32,6, kernel_size=(3,3),stride=1),
                    nn.ReLU(),
                    nn.AvgPool2d(kernel_size=(2,2),stride=2),
                    nn.ReLU(),       
                    nn.Flatten(),
                    nn.Linear(2268,1024),
                    nn.ReLU(),
                    nn.Linear(1024,513),
                    nn.ReLU(),
                    ).to(device) 

print(model)
Sequential(
  (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
  (1): ReLU()
  (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (3): ReLU()
  (4): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): ReLU()
  (6): Conv2d(32, 6, kernel_size=(3, 3), stride=(1, 1))
  (7): ReLU()
  (8): AvgPool2d(kernel_size=(2, 2), stride=2, padding=0)
  (9): ReLU()
  (10): Flatten()
  (11): Linear(in_features=2268, out_features=1024, bias=True)
  (12): ReLU()
  (13): Linear(in_features=1024, out_features=513, bias=True)
  (14): ReLU()
)
In [ ]:
# defining the Mean squared error for loss function
criterion = nn.MSELoss() 
In [ ]:
optimizer = optim.Adam(model.parameters())
start = time()
epochs = 200
for i in range(epochs):
    running_loss = 0
    for x, y in trainloader:
      x=x.to(device)
      y=y.to(device)
      optimizer.zero_grad()

      output = model(x.view(-1,1,20,513))
      loss = criterion(output, y)
        
      #This is where the model learns by backpropagating
      loss.backward()
        
      #And optimizes its weights here
      optimizer.step()
        
      running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(i, running_loss/len(trainloader)))
print("\nTraining Time =",time()-start)
Epoch 0 - Training loss: 0.08960044607520104
Epoch 1 - Training loss: 0.07692374847829342
Epoch 2 - Training loss: 0.06757998941466212
Epoch 3 - Training loss: 0.0633871553465724
Epoch 4 - Training loss: 0.057708676904439926
Epoch 5 - Training loss: 0.05119535271078348
Epoch 6 - Training loss: 0.04354371698573232
Epoch 7 - Training loss: 0.038396379072219135
Epoch 8 - Training loss: 0.03499619048088789
Epoch 9 - Training loss: 0.030829435028135777
Epoch 10 - Training loss: 0.02677404214628041
Epoch 11 - Training loss: 0.02420265320688486
Epoch 12 - Training loss: 0.02327483459375799
Epoch 13 - Training loss: 0.0215888031758368
Epoch 14 - Training loss: 0.01898319725878537
Epoch 15 - Training loss: 0.017809338122606277
Epoch 16 - Training loss: 0.01638960442505777
Epoch 17 - Training loss: 0.014857350941747427
Epoch 18 - Training loss: 0.013791473000310361
Epoch 19 - Training loss: 0.013419375289231539
Epoch 20 - Training loss: 0.012355825584381818
Epoch 21 - Training loss: 0.012403935985639691
Epoch 22 - Training loss: 0.012949306005612015
Epoch 23 - Training loss: 0.01237433385103941
Epoch 24 - Training loss: 0.010721345595084131
Epoch 25 - Training loss: 0.009663497400470077
Epoch 26 - Training loss: 0.010099840955808759
Epoch 27 - Training loss: 0.008982868329621852
Epoch 28 - Training loss: 0.008182818023487925
Epoch 29 - Training loss: 0.007596045476384461
Epoch 30 - Training loss: 0.007350369356572628
Epoch 31 - Training loss: 0.007215046766214072
Epoch 32 - Training loss: 0.006954016932286322
Epoch 33 - Training loss: 0.006841205735690891
Epoch 34 - Training loss: 0.006776953907683491
Epoch 35 - Training loss: 0.006603176379576326
Epoch 36 - Training loss: 0.006298037036322058
Epoch 37 - Training loss: 0.006204719864763319
Epoch 38 - Training loss: 0.0061774237779900435
Epoch 39 - Training loss: 0.005783315654844046
Epoch 40 - Training loss: 0.005772712593898177
Epoch 41 - Training loss: 0.005989000387489796
Epoch 42 - Training loss: 0.006164757756050676
Epoch 43 - Training loss: 0.005924312956631183
Epoch 44 - Training loss: 0.005783104640431702
Epoch 45 - Training loss: 0.005430000484921038
Epoch 46 - Training loss: 0.005374475591816008
Epoch 47 - Training loss: 0.005162075813859701
Epoch 48 - Training loss: 0.004794538835994899
Epoch 49 - Training loss: 0.005146704381331801
Epoch 50 - Training loss: 0.00537431271513924
Epoch 51 - Training loss: 0.005101392115466297
Epoch 52 - Training loss: 0.004627743968740106
Epoch 53 - Training loss: 0.00460779428249225
Epoch 54 - Training loss: 0.004324073949828744
Epoch 55 - Training loss: 0.004119636514224112
Epoch 56 - Training loss: 0.0041871244204230605
Epoch 57 - Training loss: 0.004939522012136877
Epoch 58 - Training loss: 0.004986259713768959
Epoch 59 - Training loss: 0.0044755458133295175
Epoch 60 - Training loss: 0.004129680967889726
Epoch 61 - Training loss: 0.004015388002153486
Epoch 62 - Training loss: 0.0038245291565544904
Epoch 63 - Training loss: 0.0035405512200668452
Epoch 64 - Training loss: 0.003281500202137977
Epoch 65 - Training loss: 0.0032494075363501906
Epoch 66 - Training loss: 0.003262067667674273
Epoch 67 - Training loss: 0.0032411444932222365
Epoch 68 - Training loss: 0.0030702991876751184
Epoch 69 - Training loss: 0.003099811030551791
Epoch 70 - Training loss: 0.0031981506501324473
Epoch 71 - Training loss: 0.0034721776260994374
Epoch 72 - Training loss: 0.0033926387433893977
Epoch 73 - Training loss: 0.0032583302003331483
Epoch 74 - Training loss: 0.003172050853027031
Epoch 75 - Training loss: 0.0031980758998543025
Epoch 76 - Training loss: 0.0031959283747710287
Epoch 77 - Training loss: 0.003558703127782792
Epoch 78 - Training loss: 0.00350054221926257
Epoch 79 - Training loss: 0.0036597360274754466
Epoch 80 - Training loss: 0.0034468490863218903
Epoch 81 - Training loss: 0.003404967300593853
Epoch 82 - Training loss: 0.0032683825585991142
Epoch 83 - Training loss: 0.0029949706862680615
Epoch 84 - Training loss: 0.0029137971461750565
Epoch 85 - Training loss: 0.002884552686009556
Epoch 86 - Training loss: 0.00291144564980641
Epoch 87 - Training loss: 0.003384544688742608
Epoch 88 - Training loss: 0.005281525850296021
Epoch 89 - Training loss: 0.007409580692183226
Epoch 90 - Training loss: 0.007002595369704068
Epoch 91 - Training loss: 0.004937607969623059
Epoch 92 - Training loss: 0.0039409395423717795
Epoch 93 - Training loss: 0.0035370469791814684
Epoch 94 - Training loss: 0.003070985258091241
Epoch 95 - Training loss: 0.0028744264505803584
Epoch 96 - Training loss: 0.0026550271024461834
Epoch 97 - Training loss: 0.002601450018119067
Epoch 98 - Training loss: 0.002522642177063972
Epoch 99 - Training loss: 0.0025524458615109324
Epoch 100 - Training loss: 0.0025739115313626825
Epoch 101 - Training loss: 0.002744760981295258
Epoch 102 - Training loss: 0.00263963709003292
Epoch 103 - Training loss: 0.002501634875079617
Epoch 104 - Training loss: 0.002504251932259649
Epoch 105 - Training loss: 0.00244369272259064
Epoch 106 - Training loss: 0.0025304397684521975
Epoch 107 - Training loss: 0.0025530801911372693
Epoch 108 - Training loss: 0.002726904657902196
Epoch 109 - Training loss: 0.0028170543315354734
Epoch 110 - Training loss: 0.002865866955835372
Epoch 111 - Training loss: 0.0029244979843497275
Epoch 112 - Training loss: 0.0031177293276414275
Epoch 113 - Training loss: 0.0037582907476462423
Epoch 114 - Training loss: 0.0036521023488603533
Epoch 115 - Training loss: 0.004240005114115775
Epoch 116 - Training loss: 0.003926652926020324
Epoch 117 - Training loss: 0.0032240427564829586
Epoch 118 - Training loss: 0.0028397366579156367
Epoch 119 - Training loss: 0.002522772818338126
Epoch 120 - Training loss: 0.0022827438486274334
Epoch 121 - Training loss: 0.002203579235356301
Epoch 122 - Training loss: 0.002124531945446506
Epoch 123 - Training loss: 0.002084541990188882
Epoch 124 - Training loss: 0.002042539097601548
Epoch 125 - Training loss: 0.0020401257497724146
Epoch 126 - Training loss: 0.001991442736471072
Epoch 127 - Training loss: 0.0019065075495745987
Epoch 128 - Training loss: 0.001861713215475902
Epoch 129 - Training loss: 0.0018353733641561121
Epoch 130 - Training loss: 0.001830516167683527
Epoch 131 - Training loss: 0.0018210183479823172
Epoch 132 - Training loss: 0.00191130957682617
Epoch 133 - Training loss: 0.0018649717909283935
Epoch 134 - Training loss: 0.0018420083622913807
Epoch 135 - Training loss: 0.0018643530551344157
Epoch 136 - Training loss: 0.0018345362332183868
Epoch 137 - Training loss: 0.0018844411533791572
Epoch 138 - Training loss: 0.0020143646514043214
Epoch 139 - Training loss: 0.002029091096483171
Epoch 140 - Training loss: 0.0020080958056496456
Epoch 141 - Training loss: 0.002052333898609504
Epoch 142 - Training loss: 0.002067831775639206
Epoch 143 - Training loss: 0.002096139872446656
Epoch 144 - Training loss: 0.002129021246219054
Epoch 145 - Training loss: 0.002321810560533777
Epoch 146 - Training loss: 0.0023628241615369916
Epoch 147 - Training loss: 0.002319197583710775
Epoch 148 - Training loss: 0.002273774764034897
Epoch 149 - Training loss: 0.002217542938888073
Epoch 150 - Training loss: 0.0022967777971643955
Epoch 151 - Training loss: 0.0022727206873241813
Epoch 152 - Training loss: 0.002344411925878376
Epoch 153 - Training loss: 0.00230252004112117
Epoch 154 - Training loss: 0.0023134817543905228
Epoch 155 - Training loss: 0.0022950537502765657
Epoch 156 - Training loss: 0.0022920548100955783
Epoch 157 - Training loss: 0.0022179708466865122
Epoch 158 - Training loss: 0.0021959305449854583
Epoch 159 - Training loss: 0.0025052198092453183
Epoch 160 - Training loss: 0.004031435115030036
Epoch 161 - Training loss: 0.003765407623723149
Epoch 162 - Training loss: 0.003769229492172599
Epoch 163 - Training loss: 0.0033856027643196286
Epoch 164 - Training loss: 0.0027864807459991427
Epoch 165 - Training loss: 0.002541032189037651
Epoch 166 - Training loss: 0.0023762786644510926
Epoch 167 - Training loss: 0.0024510148738045245
Epoch 168 - Training loss: 0.002333134668879211
Epoch 169 - Training loss: 0.00216487105935812
Epoch 170 - Training loss: 0.0021450545697007327
Epoch 171 - Training loss: 0.0020564877253491432
Epoch 172 - Training loss: 0.002027134079253301
Epoch 173 - Training loss: 0.0019071643706411122
Epoch 174 - Training loss: 0.0017809103766921908
Epoch 175 - Training loss: 0.001741571439197287
Epoch 176 - Training loss: 0.001663135556736961
Epoch 177 - Training loss: 0.0016502095735631884
Epoch 178 - Training loss: 0.0017417495080735534
Epoch 179 - Training loss: 0.0016922061913646758
Epoch 180 - Training loss: 0.0017748757207300514
Epoch 181 - Training loss: 0.001694059994770214
Epoch 182 - Training loss: 0.0016473475203383713
Epoch 183 - Training loss: 0.0016614385531283915
Epoch 184 - Training loss: 0.0016187061963137239
Epoch 185 - Training loss: 0.0016413686622399836
Epoch 186 - Training loss: 0.0016029031132347882
Epoch 187 - Training loss: 0.0017230270022992045
Epoch 188 - Training loss: 0.0019491093698889017
Epoch 189 - Training loss: 0.002607071289094165
Epoch 190 - Training loss: 0.0028626175422687083
Epoch 191 - Training loss: 0.002500115497969091
Epoch 192 - Training loss: 0.002182067732792348
Epoch 193 - Training loss: 0.0021225195901934057
Epoch 194 - Training loss: 0.0020160251355264337
Epoch 195 - Training loss: 0.0017971710534766317
Epoch 196 - Training loss: 0.0016672413563355803
Epoch 197 - Training loss: 0.0016109210439026356
Epoch 198 - Training loss: 0.0015507781354244798
Epoch 199 - Training loss: 0.0015310900751501321

Training Time = 54.38415336608887
In [ ]:
# Saving the model for future purposes (optional)
torch.save(model,'denoise_problem2.h5')
In [ ]:
# Load the model (optional)
model = torch.load('denoise_problem2.h5')

SNR calculation on train signals

In [ ]:
# Getting clean O/P for train noisy signal 

with torch.no_grad():
    out = model(X_tensor.view(-1,1,20,513))
out_numpy = out.cpu().numpy()
print("Number of output ex and features:",out_numpy.shape)
Number of output ex and features: (2459, 513)
In [ ]:
# recovered complex stft of train noisy signal
# recover clean signal using inverse STFT

X_norm = np.divide(X,X_abs)
S_pred = np.multiply(X_norm,out_numpy.T)
s_pred = librosa.istft(S_pred, hop_length=512)
print("Length of clean signal:",s_pred.shape)
Length of clean signal: (1258496,)
In [ ]:
Audio(s_pred,rate=sr)
Out[ ]:
In [ ]:
# Calculating SNR for the recovered train signal

SNR = 10*math.log10(np.sum(s[:len(s_pred),]**2)/np.sum((s[:len(s_pred),]-s_pred)**2))
print(SNR)
17.27724553822406

Output on Test signals

In [ ]:
# Load test noisy signal
# STFT and take absolute which will be fed to the network

x_test1, sr_test1=librosa.load('test_x_01.wav', sr=None)
X_test1=librosa.stft(x_test1, n_fft=1024, hop_length=512)
X_test_abs1 = np.abs(X_test1)

x_test2, sr_test2=librosa.load('test_x_02.wav', sr=None)
X_test2=librosa.stft(x_test2, n_fft=1024, hop_length=512)
X_test_abs2 = np.abs(X_test2)
In [ ]:
# Take transpose to be consistent with the network I/P

X_test_T1 = X_test_abs1.T
X_test_T2 = X_test_abs2.T
In [ ]:
# Upsample noisy signal with 19 silent frames to to get output of same size
rand_X1 = np.random.normal(0,0.001,(19,513))
rand_X2 = np.random.normal(0,0.001,(19,513))

rand_X_abs1 = np.abs(rand_X1)
rand_X_abs2 = np.abs(rand_X2)

X_T1 = np.concatenate((rand_X_abs1,X_test_T1))
X_T2 = np.concatenate((rand_X_abs2,X_test_T2))
In [ ]:
# Create patches of 20 by 513 for test signal 1 
X_test_patch1 = []
for i in range(0,X_T1.shape[0]):
  if i+20 <= X_T1.shape[0]:
    X_test_patch1.append(X_T1[i:i+20,:])
In [ ]:
# Create patches of 20 by 513 for test signal 2 
X_test_patch2 = []
for i in range(0,X_T2.shape[0]):
  if i+20 <= X_T2.shape[0]:
    X_test_patch2.append(X_T2[i:i+20,:])
In [ ]:
X_test_tensor1 = torch.tensor(X_test_patch1,dtype=torch.float32).to(device)
X_test_tensor2 = torch.tensor(X_test_patch2,dtype=torch.float32).to(device)
In [ ]:
# Getting clean O/P for test noisy signal 

with torch.no_grad():
    out1 = model(X_test_tensor1.view(-1,1,20,513))
out_numpy1 = out1.cpu().numpy()

with torch.no_grad():
    out2 = model(X_test_tensor2.view(-1,1,20,513))
out_numpy2 = out2.cpu().numpy()

print("Number of output ex and features for test signal 1:",out_numpy1.shape)
print("Number of output ex and features for test signal 2:",out_numpy2.shape)
Number of output ex and features for test signal 1: (142, 513)
Number of output ex and features for test signal 2: (380, 513)
In [ ]:
# recovered complex stft of test noisy signal 1
# recover clean signal using inverse STFT

X_norm1 = np.divide(X_test1,X_test_abs1)
S_pred1 = np.multiply(X_norm1,out_numpy1.T)
s_pred1 = librosa.istft(S_pred1, hop_length=512)
print("Length of clean signal:",s_pred1.shape)
Length of clean signal: (72192,)
In [ ]:
Audio(s_pred1,rate=sr_test1)
Out[ ]:
In [ ]:
# recovered complex stft of test noisy signal 2
# recover clean signal using inverse STFT

X_norm2 = np.divide(X_test2,X_test_abs2)
S_pred2 = np.multiply(X_norm2,out_numpy2.T)
s_pred2 = librosa.istft(S_pred2, hop_length=512)
print("Length of clean signal:",s_pred2.shape)
Length of clean signal: (194048,)
In [ ]:
Audio(s_pred2,rate=sr_test2)
Out[ ]: